Code
library(tidyverse)
library(magrittr)
library(gt)
library(patchwork)
library(SummarizedExperiment)
library(tidySummarizedExperiment)
library(MultiAssayExperiment)
tmp <- fs::dir_map("R scripts/", source)
theme_set(theme_light())library(tidyverse)
library(magrittr)
library(gt)
library(patchwork)
library(SummarizedExperiment)
library(tidySummarizedExperiment)
library(MultiAssayExperiment)
tmp <- fs::dir_map("R scripts/", source)
theme_set(theme_light())MultiAssayExperiment objectdata_source <- "real"
mae_files <-
fs::dir_ls(
str_c(
get_data_dir(data_source = data_source),
"02 MAEs/"
),
regexp = "mae_full_.*\\.rds$"
) |>
sort(decreasing = TRUE) |>
magrittr::extract(1)
mae <- readRDS(mae_files)
rm(mae_files)tmp <-
dplyr::inner_join(
mae[["mg"]] |>
as_tibble() |>
filter(!is.na(LBP)) |>
filter(!is.na(rel_abs_bact)) |>
select(.feature, .sample, rel_abs_bact, sample_type) |>
dplyr::rename(
mg_rel_ab_bact = rel_abs_bact
),
mae[["qPCR"]] |>
as_tibble() |>
filter(!is.na(LBP)) |>
select(.feature, .sample, copies_per_swab_med, copies_per_swab_cv, strain_group_qpcr),
by = join_by(.feature, .sample)
)
tmp |>
ggplot() +
aes(x = mg_rel_ab_bact, y = copies_per_swab_med, color = sample_type) +
geom_point(alpha = 0.3, size = 1) +
facet_wrap(~ strain_group_qpcr + .feature) +
scale_y_log10() +
scale_x_log10()tmp |>
dplyr::count(.feature, mg_rel_ab_bact > 0, copies_per_swab_med > 0) |>
ggplot() +
aes(x = `mg_rel_ab_bact > 0`, y = `copies_per_swab_med > 0`) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n)) +
scale_fill_gradient(low = "white", high = "dodgerblue3") +
facet_wrap(~ .feature) tmp |>
dplyr::count(.feature, mg_rel_ab_bact > 0, copies_per_swab_med > 1e+05) |>
ggplot() +
aes(x = `mg_rel_ab_bact > 0`, y = `copies_per_swab_med > 1e+05`) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n)) +
scale_fill_gradient(low = "white", high = "dodgerblue3") +
facet_wrap(~ .feature) tmp |>
dplyr::count(.feature, mg_rel_ab_bact > 0, copies_per_swab_med > 10000) |>
ggplot() +
aes(x = `mg_rel_ab_bact > 0`, y = `copies_per_swab_med > 10000`) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n)) +
scale_fill_gradient(low = "white", high = "dodgerblue3") +
facet_wrap(~ .feature) TODO sensitivity-specificity curves for each LBP
TODO
weekly_samples_from_randomized_participants <-
dplyr::inner_join(
tibble(uid = mae[["mg"]]$uid),
tibble(uid = mae[["amplicon"]]$uid),
by = join_by(uid)
) |>
dplyr::inner_join(
mae@colData |> as.data.frame() |> as_tibble() |>
filter(randomized) |>
select(uid, pid, visit_code, randomized, arm, visit_type),
by = join_by(uid)
) |>
filter(visit_type == "Clinic")
mae_sub <- mae[, weekly_samples_from_randomized_participants$uid]random_pids <- mae_sub$pid |> table() |> sort(decreasing = TRUE) |> names() |> head(10)map(
random_pids,
plot_data_for_pid,
mae_sub = mae_sub
)[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
all_samples_from_randomized_participants <-
dplyr::full_join(
tibble(uid = mae[["mg"]]$uid),
tibble(uid = mae[["amplicon"]]$uid),
by = join_by(uid)
) |>
dplyr::inner_join(
mae@colData |> as.data.frame() |> as_tibble() |>
filter(randomized) |>
select(uid, pid, visit_code, randomized, arm),
by = join_by(uid)
)
mae_sub <- mae[, all_samples_from_randomized_participants$uid]# random_pids <- mae_sub$pid |> table() |> sort(decreasing = TRUE) |> names() |> head(10)map(
random_pids,
plot_data_for_pid,
mae_sub = mae_sub
)[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
“Guessing the arms” based on the MG and qPCR data
if at least > 3% of any LBP115-specific strains or > 10% of all LBP115-specific strains in the MG data at any time-point -> “115”
if in group 4 -> overlap arm
if at least 3% of any LBP106 strains or > 10% of all LBP106 strains in the qPCR data at any time-point -> “106 3 or 7”
otherwise -> placebo
TODO
The primary outcome is “colonization with any of the L. crispatus strains contained in the LBP by 5 weeks of follow-up as assessed by metagenomic sequencing of the vaginal microbial community with detection of any one of LBP strains at >5% relative abundance or a combination of the strains accounting for >10% relative abundance by metagenomics.”
We compute, at each visit and for each participant, the total relative abundance of all LBP strains or the maximum relative abundance of any LBP strain.
If the total relative abundance of all LBP strains is larger than 0.1 or the maximum relative abundance of any LBP strain is larger than 0.05, we consider that the LBP achieved colonization at that visit.
colonization <-
mae[["mg"]] |>
as_tibble() |>
filter(sample_type == "Clinical sample") |>
filter(!is.na(LBP)) |>
group_by(.sample) |>
summarize(
n = n(),
total_LBP_rel_ab_at = sum(rel_abs_bact),
max_LBP_rel_ab_at = max(rel_abs_bact),
n_detected_strains_at = sum(rel_abs_bact > 0),
n_detected_LC106_strains_at = sum(rel_abs_bact[LBP == "LC-106 & LC-115"] > 0),
n_detected_LC115_only_strains_at = sum(rel_abs_bact[LBP == "LC-115"] > 0)
) |>
mutate(
colonized_at = (total_LBP_rel_ab_at > 0.1) | (max_LBP_rel_ab_at > 0.05)
) colonization <-
colonization |>
dplyr::left_join(
mae@colData |> as_tibble() |>
select(uid, pid, visit_code, randomized, arm, location) |>
dplyr::rename(.sample = uid),
by = join_by(.sample)
) |>
arrange(pid, visit_code) colonization |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_at) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)From the colonization status at each visit, we can compute the colonization status by each visit.
A participant is considered to have colonized by a visit if they have been colonized at that visit or any previous visit, starting from their post-product visit (“1200” for all groups).
Since we have some missing visits, we impute these as “not colonized”
colonization <-
colonization |>
select(-c(arm, randomized, location)) |>
dplyr::full_join(
colonization |> select(pid, arm, randomized, location) |> distinct() |>
expand_grid(
visit_code = unique(colonization$visit_code) |> sort()
),
by = join_by(pid, visit_code)
)colonization |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_at) +
geom_tile() +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)colonization <-
colonization |>
arrange(pid, visit_code) |>
mutate(colonized_at = colonized_at |> replace_na(FALSE)) |>
group_by(pid) |>
mutate(
tmp = ifelse(as.numeric(visit_code) < 1200, FALSE, colonized_at),
colonized_by = cummax(tmp) |> as.logical()
) |>
ungroup() |>
select(-tmp)colonization |>
ggplot() +
aes(x = visit_code, y = pid, fill = colonized_by) +
geom_tile() +
annotate(
geom = "rect",
xmin = 10.5, xmax = 11.5, ymin = -Inf, ymax = Inf,
col = "black", alpha = 0, linewidth = 1
) +
facet_grid(
randomized + arm ~ .,
scales = "free_y", space = "free_y", labeller = label_both
) +
theme(
strip.text.y = element_text(angle = 0)
)primary_outcomes assay to MAEse_primary <-
SummarizedExperiment(
assays = list(
outcome =
colonization |>
# filter(!is.na(pid)) |>
# mutate(.sample = str_c(pid, "_", visit_code)) |>
filter(!is.na(.sample)) |>
select(.sample, colonized_at, colonized_by) |>
as.data.frame() |>
column_to_rownames(".sample") |> t()
)
)
mae <- c(mae, list(primary_outcomes = se_primary))MultiAssayExperiment objectsWe save the “master” MAE (with all assays); but if needed for publication, we can subset the MAE to only include the assays/data of interest.
saveRDS(
mae,
str_c(
get_data_dir(data_source = data_source),
"03 QCed MAEs/",
"mae_full_", today() |> str_remove_all("-"), ".rds"
)
)